import os
import random

def process_txt_file(input_file_path, output_file_path, percentage):
    with open(input_file_path, 'r') as f:
        jpg_paths = [line.strip() for line in f.readlines()]

    # Batch read .txt file contents
    txt_contents = {}
    for jpg_path in jpg_paths:
        txt_path = jpg_path.replace('.jpg', '.txt')
        if os.path.exists(txt_path):
            with open(txt_path, 'r') as f:
                txt_contents[jpg_path] = f.read().strip()

    empty_txt_jpg_paths = []
    non_empty_txt_jpg_paths = []
    for jpg_path in jpg_paths:
        if jpg_path in txt_contents:
            if not txt_contents[jpg_path]:
                empty_txt_jpg_paths.append(jpg_path)
            else:
                non_empty_txt_jpg_paths.append(jpg_path)

    num_to_keep = int(len(empty_txt_jpg_paths) * percentage)
    empty_jpg_paths_to_keep = random.sample(empty_txt_jpg_paths, num_to_keep)

    with open(output_file_path, 'w') as f:
        for jpg_path in non_empty_txt_jpg_paths + empty_jpg_paths_to_keep:
            f.write(jpg_path + '\n')

# Usage
process_txt_file('gt_guiji_all_20240823.txt', 'gt_guiji_all_20240823_empty_0.txt', 0)